import pandas as pd
import datetime
from datetime import date,timedelta
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default ='plotly_white'
data_control = pd.read_csv("control_group.csv",sep=';')
data_test = pd.read_csv("test_group.csv",sep=';')
# data_control.head()
# data_test.head()
data_control.columns = ["Campaign Name", "Date", "Amount Spent",
"Number of Impressions", "Reach", "Website Clicks",
"Searches Received", "Content Viewed", "Added to Cart",
"Purchases"]
data_test.columns = ["Campaign Name", "Date", "Amount Spent",
"Number of Impressions", "Reach", "Website Clicks",
"Searches Received", "Content Viewed", "Added to Cart",
"Purchases"]
data_control.head()
| Campaign Name | Date | Amount Spent | Number of Impressions | Reach | Website Clicks | Searches Received | Content Viewed | Added to Cart | Purchases | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Control Campaign | 1.08.2019 | 2280 | 82702.0 | 56930.0 | 7016.0 | 2290.0 | 2159.0 | 1819.0 | 618.0 |
| 1 | Control Campaign | 2.08.2019 | 1757 | 121040.0 | 102513.0 | 8110.0 | 2033.0 | 1841.0 | 1219.0 | 511.0 |
| 2 | Control Campaign | 3.08.2019 | 2343 | 131711.0 | 110862.0 | 6508.0 | 1737.0 | 1549.0 | 1134.0 | 372.0 |
| 3 | Control Campaign | 4.08.2019 | 1940 | 72878.0 | 61235.0 | 3065.0 | 1042.0 | 982.0 | 1183.0 | 340.0 |
| 4 | Control Campaign | 5.08.2019 | 1835 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
data_test.head()
| Campaign Name | Date | Amount Spent | Number of Impressions | Reach | Website Clicks | Searches Received | Content Viewed | Added to Cart | Purchases | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Test Campaign | 1.08.2019 | 3008 | 39550 | 35820 | 3038 | 1946 | 1069 | 894 | 255 |
| 1 | Test Campaign | 2.08.2019 | 2542 | 100719 | 91236 | 4657 | 2359 | 1548 | 879 | 677 |
| 2 | Test Campaign | 3.08.2019 | 2365 | 70263 | 45198 | 7885 | 2572 | 2367 | 1268 | 578 |
| 3 | Test Campaign | 4.08.2019 | 2710 | 78451 | 25937 | 4216 | 2216 | 1437 | 566 | 340 |
| 4 | Test Campaign | 5.08.2019 | 2297 | 114295 | 95138 | 5863 | 2106 | 858 | 956 | 768 |
data_control.isnull().sum()
Campaign Name 0 Date 0 Amount Spent 0 Number of Impressions 1 Reach 1 Website Clicks 1 Searches Received 1 Content Viewed 1 Added to Cart 1 Purchases 1 dtype: int64
# for i in data_control.columns:
# data_control[i].fillna(value=data_control[i].mean(), inplace=True)
data_control["Number of Impressions"].fillna(value=data_control["Number of Impressions"].mean(), inplace=True)
data_control["Reach"].fillna(value=data_control["Reach"].mean(),
inplace=True)
data_control["Website Clicks"].fillna(value=data_control["Website Clicks"].mean(),
inplace=True)
data_control["Searches Received"].fillna(value=data_control["Searches Received"].mean(),
inplace=True)
data_control["Content Viewed"].fillna(value=data_control["Content Viewed"].mean(),
inplace=True)
data_control["Added to Cart"].fillna(value=data_control["Added to Cart"].mean(),
inplace=True)
data_control["Purchases"].fillna(value=data_control["Purchases"].mean(),
inplace=True)
data = data_control.merge(data_test,how='outer').sort_values(["Date"])
data = data.reset_index(drop=True)
data.head()
C:\Users\kevin\anaconda3\lib\site-packages\pandas\core\reshape\merge.py:1207: UserWarning: You are merging on int and float columns where the float values are not equal to their int representation. warnings.warn(
| Campaign Name | Date | Amount Spent | Number of Impressions | Reach | Website Clicks | Searches Received | Content Viewed | Added to Cart | Purchases | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Control Campaign | 1.08.2019 | 2280 | 82702.0 | 56930.0 | 7016.0 | 2290.0 | 2159.0 | 1819.0 | 618.0 |
| 1 | Test Campaign | 1.08.2019 | 3008 | 39550.0 | 35820.0 | 3038.0 | 1946.0 | 1069.0 | 894.0 | 255.0 |
| 2 | Test Campaign | 10.08.2019 | 2790 | 95054.0 | 79632.0 | 8125.0 | 2312.0 | 1804.0 | 424.0 | 275.0 |
| 3 | Control Campaign | 10.08.2019 | 2149 | 117624.0 | 91257.0 | 2277.0 | 2475.0 | 1984.0 | 1629.0 | 734.0 |
| 4 | Test Campaign | 11.08.2019 | 2420 | 83633.0 | 71286.0 | 3750.0 | 2893.0 | 2617.0 | 1075.0 | 668.0 |
figure = px.scatter(data_frame = data,x='Number of Impressions',y='Amount Spent',size='Amount Spent',color='Campaign Name',trendline='ols')
figure